Overview of Plotly

# Essential imports for constructing data and plotting
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import plotly.offline as py
import plotly.graph_objs as go 
import plotly.figure_factory as ff 

Plot with Matplotlib

# Create fake dataset to plot 
df = pd.DataFrame(np.random.rand(100, 4), columns=['A', 'B', 'C', 'D'])
df.plot()
plt.legend(loc='upper right')
plt.show()
../_images/P00-Plotly Basic Overview_3_0.png

Plot with Plotly

# Create a fake dataset to show in plot 
df = pd.DataFrame(np.random.rand(100, 4), columns=['A', 'B', 'C', 'D'])
data = ([
    {'x': df.index, 'y': df[col], 'name': col} for col in df.columns
])

# Create figure 
fig = go.Figure(data=data)
# Show figure 
py.iplot(fig)

Scatter Plot

np.random.seed(42)
random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)


data = [go.Scatter(
    x = random_x,
    y = random_y, 
    mode= 'markers'
)] 

fig = go.Figure(data=data)
fig.show() 

Layout

# Set random values for plotting 
np.random.seed(42)
random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)

# Create data and plot 
data = [go.Scatter(
    x = random_x,
    y = random_y, 
    mode = 'markers'
)]


# Create layout 
layout = go.Layout(
    title = 'Random Data Scatterplot', # Graph title
    xaxis = dict(title = 'Some random x-values'), # x-axis label
    yaxis = dict(title = 'Some random y-values'), # y-axis label
    hovermode ='closest' # handles multiple points landing on the same vertical
)

# Creare figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show() 

Customization

# Set random values for plotting 
np.random.seed(42)
random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)

# Create data and plot 
data = [go.Scatter(
    x = random_x,
    y = random_y, 
    mode = 'markers',
    # Change marker style 
    marker = dict(
        size = 12,
        color = 'rgb(51, 204, 153)',
        symbol = 'pentagon', 
        line = dict(
            width = 2, 
        )
    )
)]

# Create layout 
layout = go.Layout(
    title = 'Random Data Scatterplot', # Graph title
    xaxis = dict(title = 'Some random x-values'), # x-axis label
    yaxis = dict(title = 'Some random y-values'), # y-axis label
    hovermode ='closest' # handles multiple points landing on the same vertical
)

# Create figure
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()

Line Charts

np.random.seed(56)
x_values = np.linspace(0, 1, 100) # 100 evenly spaced values
y_values = np.random.randn(100) # 100 random values

# Create traces 
trace0 = go.Scatter(
    x = x_values, 
    y = y_values+5,
    mode= 'markers',
    name = 'markers'
)

trace1 = go.Scatter(
    x = x_values, 
    y = y_values,
    mode= 'lines+markers', 
    name = 'lines+markers'
)

trace2 = go.Scatter(
    x = x_values, 
    y = y_values-5, 
    mode= 'lines', 
    name = 'lines'
)

# Create data 
data = [trace0, trace1, trace2]

# Create layout 
layout = go.Layout(
    title = 'Line charts showing different methods.'
)

# Create 
fig = go.Figure(data=data, layout=layout)

# Show plot 
fig.show()

Reading Data and Plotting

# Read a csv file from local computer 
df = pd.read_csv("../data/population.csv", index_col=0)
# Examine first few rows
df.head()
PopEstimate2010 PopEstimate2011 PopEstimate2012 PopEstimate2013 PopEstimate2014 PopEstimate2015 PopEstimate2016 PopEstimate2017
Name
Connecticut 3580171 3591927 3597705 3602470 3600188 3593862 3587685 3588184
Maine 1327568 1327968 1328101 1327975 1328903 1327787 1330232 1335907
Massachusetts 6564943 6612178 6659627 6711138 6757925 6794002 6823721 6859819
New Hampshire 1316700 1318345 1320923 1322622 1328684 1330134 1335015 1342795
Rhode Island 1053169 1052154 1052761 1052784 1054782 1055916 1057566 1059639
# Create traces 
traces = [go.Scatter(
    x = df.columns, 
    y = df.loc[name], 
    mode = 'markers+lines', 
    name = name 
)for name in df.index]

# Create layout 
layout = go.Layout(
    title = 'Population Estimates of the Six New England States.'
)

# Create figure 
fig = go.Figure(data=traces, layout=layout)

# Show figure 
fig.show()

Bar Charts

# Read data 
df = pd.read_csv("../data/2018WinterOlympics.csv")
# Examine first few rows 
df.head()
Rank NOC Gold Silver Bronze Total
0 1 Norway 14 14 11 39
1 2 Germany 14 10 7 31
2 3 Canada 11 8 10 29
3 4 United States 9 8 6 23
4 5 Netherlands 8 6 6 20

Create Simple Bar Plot

# Create traces 
data = [go.Bar(
    x = df['NOC'], # NOC stands for National Olympic Committee
    y = df['Total'], 
)]

# Create layout 
layout = go.Layout(
    title = '2018 Winter Olympic Medals by Country.'
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()
# Take a look at data again..
df.head()
Rank NOC Gold Silver Bronze Total
0 1 Norway 14 14 11 39
1 2 Germany 14 10 7 31
2 3 Canada 11 8 10 29
3 4 United States 9 8 6 23
4 5 Netherlands 8 6 6 20

Comparison between Variables

# Create traces 
trace1 = go.Bar(
    x = df['NOC'], 
    y = df['Gold'], 
    name = 'Gold', 
    marker = dict(color='#FFD700')
)

trace2 = go.Bar(
    x = df['NOC'], 
    y = df['Silver'], 
    name = 'Silver', 
    marker = dict(color='#9EA0A1')
)

trace3 = go.Bar(
    x = df['NOC'], 
    y = df['Bronze'], 
    name = 'Bronze', 
    marker = dict(color='#CD5F32')
)

# Store traces in data 
data = [trace1, trace2, trace3]

# Create layout 
layout = go.Layout(
    title = '2018 Winter Olympic Medals by Country.'
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()

Stacked Bar Plot

# Create traces 
trace1 = go.Bar(
    x = df['NOC'], 
    y = df['Gold'], 
    name = 'Gold', 
    marker = dict(color='#FFD700')
)

trace2 = go.Bar(
    x = df['NOC'], 
    y = df['Silver'], 
    name = 'Silver', 
    marker = dict(color='#9EA0A1')
)

trace3 = go.Bar(
    x = df['NOC'], 
    y = df['Bronze'], 
    name = 'Bronze', 
    marker = dict(color='#CD5F32')
)

# Store traces in data 
data = [trace1, trace2, trace3]

# Create layout 
layout = go.Layout(
    title = '2018 Winter Olympic Medals by Country.', 
    barmode = 'stack'
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()

Bubble Plot

# Read data 
df = pd.read_csv("../data/mpg.csv")
# Examine first few rows 
df.head()
mpg cylinders displacement horsepower weight acceleration model_year origin name
0 18.0 8 307.0 130 3504 12.0 70 1 chevrolet chevelle malibu
1 15.0 8 350.0 165 3693 11.5 70 1 buick skylark 320
2 18.0 8 318.0 150 3436 11.0 70 1 plymouth satellite
3 16.0 8 304.0 150 3433 12.0 70 1 amc rebel sst
4 17.0 8 302.0 140 3449 10.5 70 1 ford torino
data =  go.Scatter(
    x = df['horsepower'], 
    y = df['mpg'],
    text= df['name'], 
    mode = 'markers', 
    marker= dict(size=1.5 * df['cylinders']) # set the marker size 
)

layout = go.Layout(
    title = 'Veichke mpg vs. horsepower.', 
    xaxis = dict(title='horsepower'), 
    yaxis = dict(title='mpg'), 
    hovermode= 'closest'
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show plot 
fig.show()

Boxplots

# Load iris dataset
import seaborn as sns 
iris = sns.load_dataset('iris')
# Examine first few rows 
iris.head()
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

Single Boxplot

data = go.Box(
    x = iris['species'], 
    y = iris['sepal_length'], 
    boxpoints= 'all', # display the original data points
    jitter=0.3, # spread them out so they all appear
    pointpos=-1.8    # offset them to the left of the box

)

# Create layout 

layout = go.Layout(
    title = 'Boxplot of Species and Sepal Length', 
    xaxis = dict(title='Species'), 
    yaxis = dict(title='Sepal Length')
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()

Show Outliers

data = go.Box(
    x = iris['species'], 
    y = iris['petal_length'], 
    boxpoints= 'outliers'
)

# Create layout 

layout = go.Layout(
    title = 'Boxplot of Species and Petal Length', 
    xaxis = dict(title='Species'), 
    yaxis = dict(title='Petal Length'), 
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()

Grouped Boxplot

# Trace-1 
trace1 = go.Box(
    x = iris['species'], 
    y = iris['petal_length'], 
    boxpoints= 'outliers', 
    name = 'Petal Length'
)

# Trace-2 
trace2 = go.Box(
    x = iris['species'], 
    y = iris['petal_width'], 
    boxpoints= 'outliers', 
    name = 'Petal Width'

)

# Trace-3 
trace3 = go.Box(
    x = iris['species'], 
    y = iris['sepal_length'], 
    boxpoints= 'outliers', 
    name = 'Sepal Length'

)

# Trace-4 
trace4 = go.Box(
    x = iris['species'], 
    y = iris['sepal_width'], 
    boxpoints= 'outliers', 
    name = 'Sepal Width'

)

# Create layout 
layout = go.Layout(
    title = 'Boxplot of Iris Dataset', 
)

# Create data 
data = [trace1, trace2, trace3, trace4]

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()

Histograms

# Create data 
data = go.Histogram(
    x = iris['sepal_length'], 
)

# Create layout 
layout = go.Layout(
    title = 'Histogram of Sepal Length', 
    
)

# Create figure 
fig = go.Figure(data=data, layout=layout)

# Show figure 
fig.show()
df = pd.read_csv("../data/mpg.csv")
df.head()
mpg cylinders displacement horsepower weight acceleration model_year origin name
0 18.0 8 307.0 130 3504 12.0 70 1 chevrolet chevelle malibu
1 15.0 8 350.0 165 3693 11.5 70 1 buick skylark 320
2 18.0 8 318.0 150 3436 11.0 70 1 plymouth satellite
3 16.0 8 304.0 150 3433 12.0 70 1 amc rebel sst
4 17.0 8 302.0 140 3449 10.5 70 1 ford torino
data = go.Histogram(
    x = df['mpg'], 
    xbins=dict(start=8, end=50, size=6)
)

layout = go.Layout(
    title = 'Histogram of MPG', 
)

fig = go.Figure(data=data, layout=layout)
fig.show()
data = go.Histogram(
    x = df['mpg'], 
    xbins=dict(start=8, end=50, size=1)
)

layout = go.Layout(
    title = 'Histogram of MPG', 
)

fig = go.Figure(data=data, layout=layout)
fig.show()
data = go.Histogram(
    x = df['mpg'], 
    xbins=dict(start=8, end=50, size=.5)
)

layout = go.Layout(
    title = 'Histogram of MPG', 
)

fig = go.Figure(data=data, layout=layout)
fig.show()
df = pd.read_csv("../data/arrhythmia.csv")
df.head()
Age Sex Height
0 68 1 146
1 58 1 148
2 36 1 149
3 34 1 150
4 40 1 150
data = [go.Histogram(
    x=df[df['Sex']==0]['Height'],
    opacity=0.75,
    name = 'Male'
),
go.Histogram(
    x=df[df['Sex']==1]['Height'],
    opacity=0.75,
    name = 'Female'
)]


layout = go.Layout(
    title = 'Height Comparison by Gender',
    barmode='overlay'
)


fig = go.Figure(data=data, layout=layout)
fig.show()
x = np.random.randn(1000)
hist_data = [x]
group_labels = ['distplot']

fig = ff.create_distplot(hist_data, group_labels)
fig.show()

Heatmaps

df = pd.read_csv("../data/2010SantaBarbaraCA.csv")
df.head()
LST_DATE DAY LST_TIME T_HR_AVG
0 20100601 TUESDAY 0:00 12.7
1 20100601 TUESDAY 1:00 12.7
2 20100601 TUESDAY 2:00 12.3
3 20100601 TUESDAY 3:00 12.5
4 20100601 TUESDAY 4:00 12.7
# Create X, Y, Z data 
data = [go.Heatmap(
    x = df['DAY'], 
    y = df['LST_TIME'], 
    z = df['T_HR_AVG'].values.tolist(), 
    colorscale= 'Jet'
)]

# Create layout
layout = go.Layout(
    title = 'Hourly Temperatures, June 1-7, 2010', 
)

# Create figure
fig = go.Figure(data=data, layout=layout)
fig.show()